Load packages

library(tidyverse)
library(janitor)
library(cowplot)
library(here)
library(readxl)
library(Matrix)
library(lme4)
library(lmerTest)
library(TOSTER)
library(eyetrackingR)
theme_set(theme_cowplot())

knitr::opts_chunk$set(cache = FALSE, warn = FALSE,warning=FALSE, message = FALSE)

Load data

data_file_path <- here::here("data_analysis","registered_report","data","processed_data","CATegories_exp2_processed_data_anonymized.csv")

d <- read_csv(data_file_path)

## add general category properties from typicality dataset
## typicality
animal_rating_names <- read.csv(here::here("data_analysis","registered_report","data","processed_data","animal_ratings_stimuli_full.csv"))
animal_stims <- unique(c(unique(d$left_image),unique(d$right_image)))
typicality <- read.csv(here::here("data_analysis","registered_report","data","processed_data","typicality_animals_summarized.csv")) %>%
  mutate(item_name=str_remove(animal_name,pattern=" ")) %>%
  left_join(animal_rating_names) %>%
  mutate(image_name_resized = str_replace(image_experiment_name,".jpg","")) %>%
  filter(image_name_resized %in% animal_stims)
d <- d %>%
  left_join(typicality %>% select(image_name_resized,category,typicality_subjective),by=c("target_image" = "image_name_resized")) %>%
  rename(typicality_condition=typicality_subjective)%>%
  filter(400<age & age<600) #filter out children outside of the age bounds

Summarize participant information

#summarize subj info
subj_info_multisession <- d %>%
  distinct(sub_num, age,months,age_mo,child_gender,trial_order) %>%
  mutate(
    age_mo_c = age_mo - mean(age_mo),
    age_c = age - mean(age)
  )

subj_info <- d %>%
  distinct(sub_num,child_gender) %>%
  summarize(
    N = n(),
    N_female = sum(child_gender=="f")
  )

overall_subj_info <- subj_info_multisession %>%
  summarize(
    N = length(unique(sub_num)),
    sessions = n(),
    mean_age = mean(age_mo),
    min_age = min(age),
    max_age = max(age),
    sd_age = sd(age_mo)
  ) %>%
  left_join(subj_info)
  
overall_subj_info %>%
  knitr::kable()
N sessions mean_age min_age max_age sd_age N_female
133 235 15.70553 410 584 1.554924 67

Useable trial summary

In order for a trial to be included, participants must contribute at least 50% looking during the windows of interest when computing baseline-corrected proportion target looking: the critical window (300 ms - 2800 ms relative to target word onset) and the baseline window (-2000 ms - 0 ms relative to target word onset).

critical_window <- c(300,2800)
baseline_window <- c(-2000,0)

summarize_subj_useable_trials_critical_window <- d %>%
  filter(corrected_time_centered>=critical_window[1]&corrected_time_centered<=critical_window[2]) %>%
  group_by(sub_num,age,age_mo, child_gender, session,trial_order,trial_number,target_image,target_typicality_z,condition) %>%
  summarize(
    length_critical_window=n(),
    useable_frames_critical_window=sum(!is.na(accuracy_transformed)),
    percent_useable_critical_window=useable_frames_critical_window/length_critical_window,
    useable_critical_window=ifelse(percent_useable_critical_window>=0.5,1,0), #useable if at least 50% looking
    mean_target_looking_critical=mean(accuracy_transformed,na.rm=TRUE)
  )

summarize_subj_useable_trials_baseline_window <- d %>%
  filter(corrected_time_centered>=baseline_window[1] & corrected_time_centered<=baseline_window[2]) %>%
  group_by(sub_num, session,age,age_mo, child_gender, trial_order,trial_number,target_image,target_typicality_z,condition) %>%
  summarize(
    length_baseline_window=n(),
    useable_frames_baseline_window=sum(!is.na(accuracy_transformed)),
    percent_useable_baseline_window=useable_frames_baseline_window/length_baseline_window,
    useable_baseline_window=ifelse(percent_useable_baseline_window>=0.5,1,0), #useable if at least 50% looking
    mean_target_looking_baseline=mean(accuracy_transformed,na.rm=TRUE)
  )

#overall useable trials
summarize_subj_useable_trials <- summarize_subj_useable_trials_critical_window %>%
  left_join(summarize_subj_useable_trials_baseline_window) %>%
  mutate(
    useable_window = ifelse(useable_baseline_window==1&useable_critical_window==1,1,0),
    corrected_target_looking = mean_target_looking_critical - mean_target_looking_baseline
  )
  

summarize_useable_trials <- summarize_subj_useable_trials %>%
  group_by(sub_num, age, child_gender, session,trial_order) %>%
  summarize(
    num_useable_trials=sum(useable_window),
    num_useable_trials_critical_window = sum(useable_critical_window)
  )

#total trials
summarize_subj_trials <- summarize_useable_trials %>%
  ungroup() %>%
  group_by(sub_num) %>%
  summarize(
    session_num = n(),
    total_trials = sum(num_useable_trials),
    total_trials_critical_window = sum(num_useable_trials_critical_window),
    exclude_participant = ifelse(total_trials<24,1,0),
    exclude_participant_critical = ifelse(total_trials_critical_window<24,1,0)
  )

#average trials contributed
mean(summarize_subj_trials$total_trials)
## [1] 29.25564
#participants to exclude based on data contribution
sum(summarize_subj_trials$exclude_participant)
## [1] 47
#join with main data frame
summarize_useable_trials <- summarize_useable_trials %>%
  left_join(summarize_subj_trials)
d <- d %>%
  left_join(summarize_useable_trials) %>%
  left_join(summarize_subj_useable_trials)

summarize_useable_trials_wide <- summarize_useable_trials %>%
  ungroup() %>%
  select(sub_num,session_num,total_trials,exclude_participant,session,num_useable_trials) %>%
  group_by(sub_num,session_num,total_trials,exclude_participant) %>%
  pivot_wider(
    names_from = "session",
    names_prefix = "num_trials_session_",
    values_from = "num_useable_trials"
  )

#write out useable trial summary
write_csv(summarize_useable_trials_wide,here::here("data_analysis","registered_report","data","processed_data","CATegories_exp2_useable_trial_summary.csv"))

Overall, among the trials contributed by the 133 participants, 82.5% of trials contained sufficient looking to meet our trial-level inclusion criteria (at least 50% looking during both the baseline window and the critical window). 86 of the 133 participants contributed sufficient looking data on at least half of the experimental trials (overall M = 29.3)

Summarize subject-level accuracy

Overall

Here, we summarize each participants’ average accuracy during the critical window and average baseline-corrected proportion target looking.

# critical window only
## trial-level
trial_critical_window_accuracy <- d %>%
  filter(exclude_participant_critical==0) %>%
  filter(useable_critical_window==1) %>%
  filter(corrected_time_centered>=300&corrected_time_centered<=2800) %>%
  group_by(sub_num, age,age_mo, child_gender, trial_order,trial_number,category,target_image,target_typicality_z,condition) %>%
  summarize(mean_accuracy=mean(accuracy_transformed,na.rm=TRUE))
## average
avg_critical_window_accuracy <- trial_critical_window_accuracy %>%
  ungroup() %>%
  group_by(sub_num, child_gender) %>%
  summarize(N=n(),
            mean_age = mean(age),
            mean_age_mo = mean(age_mo),
            accuracy=mean(mean_accuracy,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(mean_accuracy,na.rm=T)/sqrt(N),
            lower_ci=accuracy-ci,
            upper_ci=accuracy+ci)

#baseline-corrected target looking
## trial-level
trial_corrected_accuracy <- d %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  distinct(sub_num,session, age,age_mo, child_gender, trial_order,trial_number,category,target_image,target_typicality_z, condition,mean_target_looking_critical,mean_target_looking_baseline,corrected_target_looking)
## average
avg_corrected_target_looking <- trial_corrected_accuracy  %>%
  group_by(sub_num, child_gender) %>%
  summarize(N=n(),
            average_corrected_target_looking=mean(corrected_target_looking,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(corrected_target_looking,na.rm=T)/sqrt(N),
            lower_ci=average_corrected_target_looking-ci,
            upper_ci=average_corrected_target_looking+ci)

By Typicality Condition

Here, we summarize each participants’ average accuracy during the critical window and average baseline-corrected proportion target looking.

# critical window only
avg_critical_window_accuracy_by_typicality <- d %>%
  filter(exclude_participant_critical==0) %>%
  filter(useable_critical_window==1) %>%
  filter(corrected_time_centered>=300&corrected_time_centered<=2800) %>%
  group_by(sub_num, age,age_mo, child_gender, trial_order,trial_number,target_image,target_typicality_z,condition) %>%
  summarize(mean_accuracy=mean(accuracy_transformed,na.rm=TRUE)) %>%
  ungroup() %>%
  group_by(sub_num, child_gender,condition) %>%
  summarize(N=n(),
            mean_age = mean(age),
            mean_age_mo = mean(age_mo),
            accuracy=mean(mean_accuracy,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(mean_accuracy,na.rm=T)/sqrt(N),
            lower_ci=accuracy-ci,
            upper_ci=accuracy+ci)

#baseline-corrected target looking
avg_corrected_target_looking_by_typicality <- d %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  distinct(sub_num, age,age_mo, child_gender, trial_order,trial_number,target_image,target_typicality_z,condition,mean_target_looking_critical,mean_target_looking_baseline,corrected_target_looking) %>%
  group_by(sub_num, child_gender,condition) %>%
  summarize(N=n(),
            average_corrected_target_looking=mean(corrected_target_looking,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(corrected_target_looking,na.rm=T)/sqrt(N),
            lower_ci=average_corrected_target_looking-ci,
            upper_ci=average_corrected_target_looking+ci)

Models

Aim 1

1.1

avg_corrected_target_looking_by_typicality <- avg_corrected_target_looking_by_typicality %>%
  mutate(
    typicality_condition_c = case_when(
      condition == "atypical" ~ -0.5,
      condition == "typical" ~ 0.5,
      TRUE ~ NA_real_
    ),
    typicality_condition_typ = case_when(
      condition == "atypical" ~ -1,
      condition == "typical" ~ 0,
      TRUE ~ NA_real_
    ),
    typicality_condition_atyp = case_when(
      condition == "atypical" ~ 0,
      condition == "typical" ~ 1,
      TRUE ~ NA_real_
    ),
  )

m_1_1 <- lmer(average_corrected_target_looking ~ 1 + typicality_condition_c + (1|sub_num),data=avg_corrected_target_looking_by_typicality)

summary(m_1_1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: average_corrected_target_looking ~ 1 + typicality_condition_c +  
##     (1 | sub_num)
##    Data: avg_corrected_target_looking_by_typicality
## 
## REML criterion at convergence: -318.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.29665 -0.49235  0.08458  0.57549  2.54722 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub_num  (Intercept) 0.003003 0.05480 
##  Residual             0.006035 0.07769 
## Number of obs: 172, groups:  sub_num, 86
## 
## Fixed effects:
##                         Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)             0.066998   0.008367 85.000000   8.007 5.47e-12 ***
## typicality_condition_c  0.021514   0.011847 85.000000   1.816   0.0729 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ 0.000
confint(m_1_1,method="Wald")
##                               2.5 %     97.5 %
## .sig01                           NA         NA
## .sigma                           NA         NA
## (Intercept)             0.050598842 0.08339743
## typicality_condition_c -0.001706766 0.04473383
1.1.1.

Yes, infants significantly recognized the target word.

1.1.2

No significant effect of typicality

Equivalence test - can’t reject equivalence test

overall_condition_summary <- avg_corrected_target_looking_by_typicality %>%
  group_by(sub_num) %>%
  summarize(
    condition_diff = average_corrected_target_looking[condition=="typical"]-average_corrected_target_looking[condition=="atypical"]
  ) %>%
  ungroup() %>%
  summarize(
    N=n(),
    diff = mean(condition_diff),
    sd = sd(condition_diff)
  )

tsum_TOST(m1=overall_condition_summary$diff,sd1=overall_condition_summary$sd,n1=overall_condition_summary$N,eqb=0.25, eqbound_type = "SMD")
## 
## One-sample t-Test
## 
## The equivalence test was non-significant, t(85) = -0.503, p = 3.08e-01
## The null hypothesis test was non-significant, t(85) = 1.816, p = 7.29e-02
## NHST: don't reject null significance hypothesis that the effect is equal to zero 
## TOST: don't reject null equivalence hypothesis
## 
## TOST Results 
##                  t df p.value
## t-test      1.8159 85   0.073
## TOST Lower  4.1343 85 < 0.001
## TOST Upper -0.5025 85   0.308
## 
## Effect Sizes 
##            Estimate      SE             C.I. Conf. Level
## Raw         0.02151 0.01185 [0.0018, 0.0412]         0.9
## Hedges's g  0.19408 0.10884   [0.016, 0.371]         0.9
## Note: SMD confidence intervals are an approximation. See vignette("SMD_calcs").
1.1.3
m_1_1_3_typ <- lmer(average_corrected_target_looking ~ 1 + typicality_condition_typ + (1|sub_num),data=avg_corrected_target_looking_by_typicality)

summary(m_1_1_3_typ)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: average_corrected_target_looking ~ 1 + typicality_condition_typ +  
##     (1 | sub_num)
##    Data: avg_corrected_target_looking_by_typicality
## 
## REML criterion at convergence: -318.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.29665 -0.49235  0.08458  0.57549  2.54722 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub_num  (Intercept) 0.003003 0.05480 
##  Residual             0.006035 0.07769 
## Number of obs: 172, groups:  sub_num, 86
## 
## Fixed effects:
##                           Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                0.07775    0.01025 153.09909   7.585 3.01e-12 ***
## typicality_condition_typ   0.02151    0.01185  85.00000   1.816   0.0729 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ 0.578
confint(m_1_1_3_typ,method="Wald")
##                                 2.5 %     97.5 %
## .sig01                             NA         NA
## .sigma                             NA         NA
## (Intercept)               0.057661808 0.09784799
## typicality_condition_typ -0.001706766 0.04473383

Infants successfully recognize words in the typical condition.

m_1_1_3_atyp <- lmer(average_corrected_target_looking ~ 1 + typicality_condition_atyp + (1|sub_num),data=avg_corrected_target_looking_by_typicality)

summary(m_1_1_3_atyp)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: average_corrected_target_looking ~ 1 + typicality_condition_atyp +  
##     (1 | sub_num)
##    Data: avg_corrected_target_looking_by_typicality
## 
## REML criterion at convergence: -318.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.29665 -0.49235  0.08458  0.57549  2.54722 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub_num  (Intercept) 0.003003 0.05480 
##  Residual             0.006035 0.07769 
## Number of obs: 172, groups:  sub_num, 86
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 0.05624    0.01025 153.09909   5.486 1.66e-07 ***
## typicality_condition_atyp   0.02151    0.01185  85.00000   1.816   0.0729 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ -0.578
confint(m_1_1_3_atyp,method="Wald")
##                                  2.5 %     97.5 %
## .sig01                              NA         NA
## .sigma                              NA         NA
## (Intercept)                0.036148277 0.07633446
## typicality_condition_atyp -0.001706766 0.04473383

Infants successfully recognize words in the atypical condition.

1.2

trial_corrected_accuracy <- trial_corrected_accuracy %>%
  mutate(
    typicality_condition_c = case_when(
      condition == "atypical" ~ -0.5,
      condition == "typical" ~ 0.5,
      TRUE ~ NA_real_
    ),
    typicality_condition_typ = case_when(
      condition == "atypical" ~ -1,
      condition == "typical" ~ 0,
      TRUE ~ NA_real_
    ),
    typicality_condition_atyp = case_when(
      condition == "atypical" ~ 0,
      condition == "typical" ~ 1,
      TRUE ~ NA_real_
    ),
  )

m_1_2 <- lmer(corrected_target_looking ~ 1 + typicality_condition_c + 
            (1 + typicality_condition_c|sub_num) +
            (1|category),
          data=trial_corrected_accuracy)
summary(m_1_2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: corrected_target_looking ~ 1 + typicality_condition_c + (1 +  
##     typicality_condition_c | sub_num) + (1 | category)
##    Data: trial_corrected_accuracy
## 
## REML criterion at convergence: 2332.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.15368 -0.63205 -0.02159  0.67215  2.77470 
## 
## Random effects:
##  Groups   Name                   Variance  Std.Dev. Corr
##  sub_num  (Intercept)            0.0028254 0.05315      
##           typicality_condition_c 0.0001944 0.01394  1.00
##  category (Intercept)            0.0001320 0.01149      
##  Residual                        0.1194800 0.34566      
## Number of obs: 3173, groups:  sub_num, 86; category, 4
## 
## Fixed effects:
##                         Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            6.811e-02  1.021e-02 6.265e+00   6.673 0.000458 ***
## typicality_condition_c 1.772e-02  1.238e-02 1.078e+03   1.431 0.152634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ 0.068 
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

1.3

Resampling

In order to plot participants’ average proportion looking to the target across the trial, we smooth/ resample time. This is necessary when plotting the timecourses given the variable sampling rate in the data (otherwise the mean observations “jump around” due to varying contributing data composition at different time points).

target_ms_per_frame=1000/30
#adapted from: https://github.com/langcog/peekds/blob/master/R/generate_aoi.R
resample_trial <- function(df_trial) {
  t_origin <- df_trial$corrected_time_centered
  data_origin <- df_trial$accuracy_transformed

  # create the new timestamps for resampling
  t_start <- min(t_origin) - (min(t_origin) %% target_ms_per_frame)
  t_resampled <- seq(from = t_start, to = max(t_origin),
                     by = target_ms_per_frame)

  # exchange strings values with integers for resampling
  # this step critical for interpolating missing vals quickly and correctly
  aoi_num <- data_origin %>%
    dplyr::recode(.missing = 2) #recode NA as 2
    
  # start resampling with approx
  aoi_resampled <- stats::approx(x = t_origin, y = aoi_num, xout = t_resampled,
                                 method = "constant", rule = 2,
                                 ties = "ordered")$y
  aoi_resampled_recoded <- aoi_resampled %>%
    dplyr::recode("0"="0","1"="1","2" = "missing") %>%
    as.numeric()

  # adding back the columns to match schema
  dplyr::tibble(corrected_time_centered = t_resampled,
                accuracy_transformed = aoi_resampled_recoded,
                trial_number = df_trial$trial_number[1],
                sub_num = df_trial$sub_num[1])
}

d_resampled <- d %>%
  dplyr::mutate(sub_num_trial_number = paste(.data$sub_num,
                                           .data$trial_number, sep = "_")) %>%
      split(.$sub_num_trial_number) %>%
      purrr::map_df(resample_trial) %>%
      dplyr::arrange(.data$sub_num, .data$trial_number)

d_info <- d %>%
  select(-corrected_time_centered,-accuracy_transformed) %>%
  distinct(sub_num, exclude_participant, useable_window, age,age_mo, child_gender, trial_order, condition, trial_order,trial_number,target_image,target_typicality_z)

d_resampled <- d_resampled %>%
  left_join(d_info) %>%
  mutate(corrected_time_centered =round(corrected_time_centered,0))
Timecourse analysis - cluster-based permutation analysis

Next, we prepare the data for use with the eyetrackingR package

d_eyetrackingr <- d_resampled %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  unite("unique_trial",trial_order,trial_number,sep="_",remove=FALSE) %>%
  mutate(
    target = case_when(
      is.na(accuracy_transformed) ~ NA,
      accuracy_transformed == 1 ~ TRUE,
      accuracy_transformed == 0 ~ FALSE,
    ),
    distractor = case_when(
      is.na(accuracy_transformed) ~ NA,
      accuracy_transformed == 0 ~ TRUE,
      accuracy_transformed == 1 ~ FALSE,
    ),
    trackloss = case_when(
      is.na(accuracy_transformed) ~ TRUE,
      TRUE ~ FALSE
    )
  ) %>%
  make_eyetrackingr_data(
    participant_column = "sub_num",
    trial_column = "unique_trial",
    time_column = "corrected_time_centered",
    trackloss_column = "trackloss",
    aoi_columns = c("target","distractor"),
    treat_non_aoi_looks_as_missing = TRUE
  )

response_window <- subset_by_window(
  d_eyetrackingr,
  window_start_time = 300,
  window_end_time = 2800,
  rezero=FALSE
)
summary_data_loss <- describe_data(response_window, 'target', 'sub_num')

response_time <- make_time_sequence_data(response_window,
                                  time_bin_size = 100, 
                                  predictor_columns = c("condition"),
                                  aois = "target",
                                  summarize_by = "sub_num" )

# visualize timecourse
plot(response_time, predictor_column = "condition") + 
  theme_light() +
  coord_cartesian(ylim = c(0,1))

#divergence analysis
# tb_analysis <- analyze_time_bins(data = response_time, predictor_column = "condition", test= 'boot_splines', within_subj = TRUE, bs_samples = 1000, alpha = .05/num_time_bins)
# plot(tb_analysis) + theme_light()
# summary(tb_analysis)

#bootstrapped cluster-based permutation analysis
n_samples <- 100
threshold_t <- 2
df_timeclust <- make_time_cluster_data(response_time, 
                                      test= "t.test", paired=TRUE,
                                      predictor_column = "condition", 
                                      threshold = threshold_t) 
plot(df_timeclust) +  ylab("T-Statistic") + theme_light()

summary(df_timeclust)
## Test Type:    t.test 
## Predictor:    condition 
## Formula:  Prop ~ condition 
## Summary of Clusters ======
## [1] Cluster   Direction EndTime  
## <0 rows> (or 0-length row.names)
clust_analysis <- analyze_time_clusters(df_timeclust, within_subj=TRUE, paired=TRUE,
                                        samples=n_samples)
plot(clust_analysis) + theme_light()

summary(clust_analysis)
## Test Type:    t.test 
## Predictor:    condition 
## Formula:  Prop ~ condition 
## Null Distribution   ====== 
##  Mean:        -0.1232 
##  2.5%:        -9.8299 
## 97.5%:        10.6581 
## Summary of Clusters ======
## [1] Cluster     Direction   EndTime     Probability
## <0 rows> (or 0-length row.names)
Timecourse Plot

Next, we plot the data. First we summarize the data in two steps: (1) summarize the data by subject for each time point, followed by (2) averaging looking for each time point across subjects.

#summarizing within subject for each time point
summarize_subj <- d_resampled %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  group_by(sub_num, age,age_mo, child_gender, trial_order, corrected_time_centered) %>%
  summarize(N=n(),
            non_na_n = sum(!is.na(accuracy_transformed)), 
            mean_accuracy=mean(accuracy_transformed,na.rm=TRUE),
            ci=qt(0.975, non_na_n-1)*sd(accuracy_transformed,na.rm=T)/sqrt(non_na_n),
            lower_ci=mean_accuracy-ci,
            upper_ci=mean_accuracy+ci) %>%
  ungroup()

#summarizing across subjects for each time point
summarize_across_subj <- summarize_subj %>%
  group_by(corrected_time_centered) %>%
  dplyr::summarize(n=n(),
            accuracy=mean(mean_accuracy,na.rm=TRUE),
            sd_accuracy=sd(mean_accuracy,na.rm=TRUE),
            se_accuracy=sd_accuracy/sqrt(n))

ggplot(summarize_across_subj,aes(corrected_time_centered,accuracy))+
  xlim(-2000,4000)+
  geom_smooth(method="gam")+
  geom_errorbar(aes(ymin=accuracy-se_accuracy,ymax=accuracy+se_accuracy),width=0)+
  geom_point()+
  geom_vline(xintercept=0,size=1.5)+
  geom_hline(yintercept=0.5,size=1.2,linetype="dashed")+
  geom_vline(xintercept=300,linetype="dotted")+
  ylim(0.35,0.65)

ggsave(here::here("figures","overall_accuracy.png"))

Timecourse by age

summarize_across_subj_by_age <- summarize_subj %>%
  mutate(age_group=ifelse(age_mo>16,"older than 16 months","younger than 16 months")) %>%
  group_by(age_group,corrected_time_centered) %>%
  dplyr::summarize(n=n(),
                   accuracy=mean(mean_accuracy,na.rm=TRUE),
                   sd_accuracy=sd(mean_accuracy,na.rm=TRUE),
                   se_accuracy=sd_accuracy/sqrt(n))
ggplot(summarize_across_subj_by_age,aes(corrected_time_centered,accuracy))+
  xlim(-2000,4000)+
  geom_smooth(method="gam")+
  geom_errorbar(aes(ymin=accuracy-se_accuracy,ymax=accuracy+se_accuracy),width=0)+
  geom_point()+
  geom_vline(xintercept=0,size=1.5)+
  geom_hline(yintercept=0.5,size=1.2,linetype="dashed")+
  geom_vline(xintercept=300,linetype="dotted")+
  facet_wrap(~age_group)

ggsave(here::here("figures","overall_accuracy_by_age.png"),width=12, height=9)

Timecourse by condition

summarize_subj_condition <- d_resampled %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  group_by(sub_num, age,age_mo, child_gender, condition, trial_order, corrected_time_centered) %>%
  summarize(mean_accuracy=mean(accuracy_transformed,na.rm=TRUE))

summarize_across_subj_cond <- summarize_subj_condition %>%
  group_by(condition,corrected_time_centered) %>%
  summarize(n=n(),
            accuracy=mean(mean_accuracy,na.rm=TRUE),
            sd_accuracy=sd(mean_accuracy,na.rm=TRUE),
            se_accuracy=sd_accuracy/sqrt(n))

num_subjects <- summarize_across_subj_cond %>%
  group_by()%>%
  summarize(max_subnum=max(n))

summarize_across_subj_cond<- summarize_subj_condition %>%
  group_by(condition,corrected_time_centered) %>%
  summarize(n=n(),
            accuracy=mean(mean_accuracy,na.rm=TRUE),
            sd_accuracy=sd(mean_accuracy,na.rm=TRUE),
            se_accuracy=sd_accuracy/sqrt(n))

ggplot(summarize_across_subj_cond,aes(corrected_time_centered,accuracy,color=condition))+
  xlim(-2500,4000)+
  geom_rect(data = data.frame(xmin = 300,
                              xmax = 2800,
                              ymin = -Inf,
                              ymax = Inf),
            aes(x=NULL, y=NULL,xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,color=NULL),
            fill = "grey", alpha = 0.2)+
  geom_rect(data = data.frame(xmin = -2000,
                              xmax = 0,
                              ymin = -Inf,
                              ymax = Inf),
            aes(x=NULL, y=NULL,xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,color=NULL),
            fill = "grey", alpha = 0.2)+
  geom_errorbar(aes(ymin=accuracy-se_accuracy,ymax=accuracy+se_accuracy),width=0)+
  geom_point(alpha=0.5)+
  geom_smooth(data=summarize_subj_condition,aes(y=mean_accuracy),method="gam")+
  geom_vline(xintercept=0,size=1.5)+
  geom_hline(yintercept=0.5,size=1.2,linetype="dashed")+
  geom_vline(xintercept=300,linetype="dotted")+
  geom_vline(xintercept=2800,linetype="dotted")+
  geom_vline(xintercept=-2000,linetype="dotted")+
  geom_vline(xintercept=0,linetype="dotted")+
  theme(legend.position = c(0.75,0.15))+
  annotate("text",label="Critical Window",x=1550,y=0.9)+
  annotate("text",label="Baseline Window",x=-1000,y=0.9)+
  ylim(0,1)+
  ylab("Proportion Target Looking")+
  xlab("Time (centered on target word onset, in ms)")

ggsave(here::here("figures","typicality_accuracy.png"),width=10,height=6)

Aim 2

Change across age

trial_corrected_accuracy <- trial_corrected_accuracy %>%
  left_join(subj_info_multisession)
  

m_2 <- lmer(corrected_target_looking ~ 1 + typicality_condition_c * age_mo_c + 
            (1 + typicality_condition_c|sub_num) +
            (1|category),
          data=trial_corrected_accuracy)
summary(m_2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: corrected_target_looking ~ 1 + typicality_condition_c * age_mo_c +  
##     (1 + typicality_condition_c | sub_num) + (1 | category)
##    Data: trial_corrected_accuracy
## 
## REML criterion at convergence: 2343
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11458 -0.64949 -0.01878  0.67391  2.80414 
## 
## Random effects:
##  Groups   Name                   Variance  Std.Dev. Corr
##  sub_num  (Intercept)            0.0025090 0.05009      
##           typicality_condition_c 0.0001211 0.01101  1.00
##  category (Intercept)            0.0001351 0.01163      
##  Residual                        0.1195285 0.34573      
## Number of obs: 3173, groups:  sub_num, 86; category, 4
## 
## Fixed effects:
##                                  Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                     6.683e-02  1.008e-02 5.859e+00   6.630 0.000626
## typicality_condition_c          1.688e-02  1.239e-02 1.380e+03   1.362 0.173399
## age_mo_c                        1.209e-02  5.417e-03 8.980e+01   2.231 0.028157
## typicality_condition_c:age_mo_c 7.505e-03  8.220e-03 1.441e+03   0.913 0.361437
##                                    
## (Intercept)                     ***
## typicality_condition_c             
## age_mo_c                        *  
## typicality_condition_c:age_mo_c    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) typc__ ag_m_c
## typclty_cn_  0.050              
## age_mo_c    -0.060  0.000       
## typclt__:__  0.000 -0.086  0.058
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Quick Visualization

ggplot(trial_corrected_accuracy,aes(age,corrected_target_looking,color=condition))+
  geom_point(alpha=0.1)+
  geom_smooth()

ggplot(trial_critical_window_accuracy,aes(age,mean_accuracy,color=condition))+
  geom_point(alpha=0.1)+
  geom_smooth()

m <- lmer(mean_target_looking_critical ~ 1 + typicality_condition_c * age_mo_c + mean_target_looking_baseline +
            (1 + typicality_condition_c|sub_num) +
            (1|category),
          data=trial_corrected_accuracy)
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## mean_target_looking_critical ~ 1 + typicality_condition_c * age_mo_c +  
##     mean_target_looking_baseline + (1 + typicality_condition_c |  
##     sub_num) + (1 | category)
##    Data: trial_corrected_accuracy
## 
## REML criterion at convergence: 963.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.43074 -0.63866  0.04833  0.68425  2.00196 
## 
## Random effects:
##  Groups   Name                   Variance Std.Dev. Corr
##  sub_num  (Intercept)            0.002184 0.046735     
##           typicality_condition_c 0.000045 0.006708 1.00
##  category (Intercept)            0.000996 0.031560     
##  Residual                        0.076816 0.277156     
## Number of obs: 3173, groups:  sub_num, 86; category, 4
## 
## Fixed effects:
##                                  Estimate Std. Error        df t value Pr(>|t|)
## (Intercept)                     5.111e-01  2.032e-02 6.806e+00  25.159 5.71e-08
## typicality_condition_c          1.686e-02  9.916e-03 1.881e+03   1.700   0.0893
## age_mo_c                        1.158e-02  4.656e-03 8.858e+01   2.488   0.0147
## mean_target_looking_baseline    9.354e-02  2.172e-02 3.124e+03   4.308 1.70e-05
## typicality_condition_c:age_mo_c 5.763e-03  6.579e-03 1.943e+03   0.876   0.3811
##                                    
## (Intercept)                     ***
## typicality_condition_c          .  
## age_mo_c                        *  
## mean_target_looking_baseline    ***
## typicality_condition_c:age_mo_c    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) typc__ ag_m_c mn_t__
## typclty_cn_  0.018                     
## age_mo_c    -0.026  0.000              
## mn_trgt_lk_ -0.524  0.000  0.002       
## typclt__:__ -0.003 -0.086  0.047  0.006
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Accuracy across age

Critical Window Only

ggplot(filter(avg_critical_window_accuracy,mean_age<700),aes(mean_age,accuracy))+
  geom_pointrange(aes(ymin=lower_ci,ymax=upper_ci), 
                  position=position_jitter(width=0.1),
                  width=0,
                  size=1.5) +
  geom_hline(yintercept=0.5,linetype="dashed")+
  geom_smooth(method="lm")+
  xlab("Age (in days)")+
  ylab("Proportion Target Looking\nduring the Critical Window")+
  ylim(0,1)

ggsave(here::here("figures","age_relationship_critical_window_accuracy.png"),width=7,height=6)

Baseline-corrected proportion target looking

# ggplot(avg_corrected_target_looking,aes(age,average_corrected_target_looking))+
#   geom_pointrange(aes(ymin=lower_ci,ymax=upper_ci), 
#                   position=position_jitter(width=0.1),
#                   width=0,
#                   size=1.5) +
#   geom_hline(yintercept=0,linetype="dashed")+
#   geom_smooth(method="lm")+
#   xlab("Age (in months)")+
#   ylab("Baseline-Corrected Proportion Target Looking")+
#   ylim(-0.55,0.55)+
#   scale_x_continuous(breaks=seq(12,18,1))
# ggsave(here::here("figures","age_relationship_baseline_corrected_accuracy.png"),width=7,height=6)

Target Looking By Item

Next, we investigate item-level (target word) variation in proportion target looking.

Overall target looking during critical window and baseline window

First, we inspect overall target looking in the critical window and in the baseline window. Note the baseline effects, such that dog and cat are more likely to be fixated during baseline than bird and fish.

#average target looking during the baseline and critical window by item for each subject
avg_subj_target_looking_by_item <- d %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  distinct(sub_num,  age,age_mo, child_gender, trial_order,trial_number,category,target_image,target_typicality_z,mean_target_looking_critical,mean_target_looking_baseline,corrected_target_looking) %>%
  group_by(sub_num, age,age_mo, child_gender, trial_order,category) %>%
  summarize(N=n(),
            mean_critical_accuracy=mean(mean_target_looking_critical,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(mean_target_looking_critical,na.rm=T)/sqrt(N),
            lower_ci=mean_critical_accuracy-ci,
            upper_ci=mean_critical_accuracy+ci,
            mean_baseline_accuracy=mean(mean_target_looking_baseline,na.rm=TRUE),
            baseline_ci=qt(0.975, N-1)*sd(mean_target_looking_baseline,na.rm=T)/sqrt(N),
            lower_baseline_ci=mean_baseline_accuracy-baseline_ci,
            upper_baseline_ci=mean_baseline_accuracy+baseline_ci)

#summarize average target looking across subjects
avg_target_looking_by_item <- avg_subj_target_looking_by_item %>%
  group_by(category) %>%
  summarize(N=n(),
            critical_accuracy=mean(mean_critical_accuracy,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(mean_critical_accuracy,na.rm=T)/sqrt(N),
            lower_ci=critical_accuracy-ci,
            upper_ci=critical_accuracy+ci,
            baseline_accuracy=mean(mean_baseline_accuracy,na.rm=TRUE),
            baseline_ci=qt(0.975, N-1)*sd(mean_baseline_accuracy,na.rm=T)/sqrt(N),
            lower_baseline_ci=baseline_accuracy-baseline_ci,
            upper_baseline_ci=baseline_accuracy+baseline_ci)
avg_target_looking_by_item %>%
  knitr::kable()
category N critical_accuracy ci lower_ci upper_ci baseline_accuracy baseline_ci lower_baseline_ci upper_baseline_ci
bird 167 0.5304431 0.0235691 0.5068741 0.5540122 0.4566338 0.0172812 0.4393527 0.4739150
cat 168 0.5966936 0.0224388 0.5742548 0.6191324 0.5181834 0.0178337 0.5003497 0.5360171
dog 167 0.5761305 0.0211010 0.5550295 0.5972315 0.5313491 0.0152826 0.5160665 0.5466317
fish 167 0.5193975 0.0245478 0.4948497 0.5439453 0.4610437 0.0167226 0.4443211 0.4777663

Target looking during the baseline and critical window split by age

#summarize average corrected target looking across subject
avg_target_looking_by_item_by_age <- avg_subj_target_looking_by_item %>%
  mutate(age_group=ifelse(age_mo>16,"older than 16 months","younger than 16 months")) %>%
  group_by(age_group,category) %>%
  summarize(N=n(),
            critical_accuracy=mean(mean_critical_accuracy,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(mean_critical_accuracy,na.rm=T)/sqrt(N),
            lower_ci=critical_accuracy-ci,
            upper_ci=critical_accuracy+ci,
            baseline_accuracy=mean(mean_baseline_accuracy,na.rm=TRUE),
            baseline_ci=qt(0.975, N-1)*sd(mean_baseline_accuracy,na.rm=T)/sqrt(N),
            lower_baseline_ci=baseline_accuracy-baseline_ci,
            upper_baseline_ci=baseline_accuracy+baseline_ci)
avg_target_looking_by_item_by_age %>%
  knitr::kable()
age_group category N critical_accuracy ci lower_ci upper_ci baseline_accuracy baseline_ci lower_baseline_ci upper_baseline_ci
older than 16 months bird 70 0.5529627 0.0362112 0.5167515 0.5891739 0.4510531 0.0271639 0.4238892 0.4782170
older than 16 months cat 70 0.5970851 0.0315153 0.5655699 0.6286004 0.4953601 0.0268679 0.4684921 0.5222280
older than 16 months dog 70 0.5787090 0.0331379 0.5455711 0.6118469 0.5334788 0.0204303 0.5130486 0.5539091
older than 16 months fish 70 0.5679949 0.0337184 0.5342765 0.6017133 0.4641878 0.0298801 0.4343077 0.4940680
younger than 16 months bird 97 0.5141919 0.0311829 0.4830090 0.5453748 0.4606611 0.0227628 0.4378983 0.4834240
younger than 16 months cat 98 0.5964139 0.0316591 0.5647548 0.6280731 0.5344858 0.0236577 0.5108281 0.5581435
younger than 16 months dog 97 0.5742698 0.0278494 0.5464203 0.6021192 0.5298122 0.0220962 0.5077160 0.5519083
younger than 16 months fish 97 0.4843272 0.0333021 0.4510251 0.5176294 0.4587747 0.0195579 0.4392169 0.4783326

Overall baseline-corrected proportion target looking

Next, we investigate item-level variation in word recognition as measured by baseline-corrected proportion target looking (to help account for the baseline difference noted above).

#average corrected target looking by item for each subject
avg_subj_corrected_target_looking_by_item <- d %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  mutate(age_group=ifelse(age_mo>16,"older than 16 months","younger than 16 months")) %>%
  distinct(sub_num, months,age_mo,age_group, child_gender, trial_order,trial_number,category,target_image,target_typicality_z,mean_target_looking_critical,mean_target_looking_baseline,corrected_target_looking) %>%
  group_by(sub_num, age_mo,age_group, child_gender, trial_order,category) %>%
  summarize(N=n(),
            average_corrected_target_looking=mean(corrected_target_looking,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(corrected_target_looking,na.rm=T)/sqrt(N),
            lower_ci=average_corrected_target_looking-ci,
            upper_ci=average_corrected_target_looking+ci)

#summarize average corrected target looking across subject
avg_corrected_target_looking_by_item <- avg_subj_corrected_target_looking_by_item %>%
  group_by(category) %>%
  summarize(N=n(),
            corrected_target_looking=mean(average_corrected_target_looking,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(average_corrected_target_looking,na.rm=T)/sqrt(N),
            lower_ci=corrected_target_looking-ci,
            upper_ci=corrected_target_looking+ci)
avg_corrected_target_looking_by_item %>%
  knitr::kable()
category N corrected_target_looking ci lower_ci upper_ci
bird 167 0.0738093 0.0289623 0.0448470 0.1027716
cat 168 0.0785102 0.0280725 0.0504377 0.1065827
dog 167 0.0447814 0.0253580 0.0194234 0.0701394
fish 167 0.0583538 0.0276439 0.0307099 0.0859978

Baseline-corrected proportion target looking split by age

#summarize average corrected target looking across subject
avg_corrected_target_looking_by_item_by_age <- avg_subj_corrected_target_looking_by_item %>%
  group_by(age_group,category) %>%
  summarize(N=n(),
            corrected_target_looking=mean(average_corrected_target_looking,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(average_corrected_target_looking,na.rm=T)/sqrt(N),
            lower_ci=corrected_target_looking-ci,
            upper_ci=corrected_target_looking+ci)
avg_corrected_target_looking_by_item_by_age %>%
  knitr::kable()
age_group category N corrected_target_looking ci lower_ci upper_ci
older than 16 months bird 70 0.1019096 0.0459486 0.0559610 0.1478582
older than 16 months cat 70 0.1017251 0.0428269 0.0588982 0.1445519
older than 16 months dog 70 0.0452301 0.0368266 0.0084036 0.0820567
older than 16 months fish 70 0.1038071 0.0415133 0.0622938 0.1453204
younger than 16 months bird 97 0.0535308 0.0374123 0.0161185 0.0909430
younger than 16 months cat 98 0.0619281 0.0374352 0.0244930 0.0993633
younger than 16 months dog 97 0.0444576 0.0351927 0.0092649 0.0796503
younger than 16 months fish 97 0.0255525 0.0361833 -0.0106308 0.0617358

Aim 3

To test whether individual differences in word recognition or typicality effects are predicted by differences in experiences with each exemplar.

3.1

#zscore parent report of typicality within participants
parent_typicality_z <- d %>%
  group_by(sub_num) %>%
  mutate(target_parent_typicality_z = ((target_parent_typicality_rating - mean(target_parent_typicality_rating))/sd(target_parent_typicality_rating)),
         distractor_parent_typicality_z = ((distractor_parent_typicality_rating - mean(distractor_parent_typicality_rating))/sd(distractor_parent_typicality_rating)))
  
parent_typicality <- parent_typicality_z %>%
  filter(exclude_participant==0) %>%
  filter(useable_window==1) %>%
  distinct(sub_num,months,age_mo,child_gender, trial_order,trial_number,category,target_image,target_typicality_z,mean_target_looking_critical,mean_target_looking_baseline,corrected_target_looking,target_parent_typicality_z,distractor_parent_typicality_z,target_image,target_parent_typicality_rating,distractor_parent_typicality_rating) %>%
  group_by(sub_num,age_mo,target_image, category,target_parent_typicality_z,target_parent_typicality_rating) %>%
  summarize(N=n(),
            average_corrected_target_looking=mean(corrected_target_looking,na.rm=TRUE)) %>%
  na.omit(target_parent_typicality_rating)

#subject details for aim 3 analysis (how many participants have survey data)
aim3_subject_info <- parent_typicality %>%
  ungroup()%>%
  summarize(
    N = length(unique(sub_num)),
    mean_age = mean(age_mo),
    sd_age = sd(age_mo)
  )

aim3_subject_info%>%
  knitr::kable()
N mean_age sd_age
73 15.75181 1.481363
#model
m_3_1 <- lmer(average_corrected_target_looking ~ target_parent_typicality_z+age_mo+ (target_parent_typicality_z|sub_num) + (1|category), parent_typicality)

summary(m_3_1)#singular fit
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: average_corrected_target_looking ~ target_parent_typicality_z +  
##     age_mo + (target_parent_typicality_z | sub_num) + (1 | category)
##    Data: parent_typicality
## 
## REML criterion at convergence: 1956.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.14393 -0.62834 -0.02457  0.67409  2.79414 
## 
## Random effects:
##  Groups   Name                       Variance  Std.Dev. Corr
##  sub_num  (Intercept)                0.0025253 0.050252     
##           target_parent_typicality_z 0.0000571 0.007556 1.00
##  category (Intercept)                0.0002544 0.015951     
##  Residual                            0.1184426 0.344155     
## Number of obs: 2683, groups:  sub_num, 73; category, 4
## 
## Fixed effects:
##                              Estimate Std. Error         df t value Pr(>|t|)  
## (Intercept)                 -0.087157   0.094115  79.014561  -0.926   0.3572  
## target_parent_typicality_z   0.007682   0.006774 782.750835   1.134   0.2572  
## age_mo                       0.010016   0.005941  77.053129   1.686   0.0959 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trg___
## trgt_prnt__ -0.008       
## age_mo      -0.992  0.017
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m_3_1,method="Wald")
##                                   2.5 %     97.5 %
## .sig01                               NA         NA
## .sig02                               NA         NA
## .sig03                               NA         NA
## .sig04                               NA         NA
## .sigma                               NA         NA
## (Intercept)                -0.271618051 0.09730458
## target_parent_typicality_z -0.005595628 0.02095922
## age_mo                     -0.001628631 0.02166042

Quick Visualization

#by category
ggplot(parent_typicality,aes(target_parent_typicality_z,average_corrected_target_looking))+
  geom_point(alpha=0.1)+
  geom_smooth(method = "lm")+
  facet_wrap(~category)

#by subject
ggplot(parent_typicality,aes(target_parent_typicality_z,average_corrected_target_looking))+
  geom_point(alpha=0.1)+
  geom_smooth(method = "lm")+
  facet_wrap(~sub_num)

hist(parent_typicality$target_parent_typicality_z)

Aim 4

We will conduct a series of analyses to determine whether our results hold across a variety of different analytic decisions

4.1. Window

4.2 excluding unknown words

We registered using CDI responses as a way to remove unknown words; however we did not administer the CDI. Thus, we are not including this analysis since it would require an arbitrary cutoff for word recognition as a proxy for understanding a word (i.e., what is the difference between 50% accuracy and 50.01% accuracy)

Calculate CIs per word & per child

Remove any words where CIs overlap with childs CI around chance

Model

Is there a typicality effect when we remove unknown words?

4.3 Reaction time as the dependent measure

We will fit models analogous to those in 1.1 and 1.2 using reaction time as our primary dependent measure rather than accuracy

Load RT (computed using compute_RT.R & rt_helper.R)

rt_path <- here::here("data_analysis","registered_report","data","processed_data","CATegories_exp2_RT_by_trial.csv")
d_rt <- read_csv(rt_path)

d_rt<- trial_corrected_accuracy %>%
  left_join(d_rt)

Visualization of RTs

hist(filter(d_rt, shift_type=="D-T")$shift_start_rt)

The data are right skewed, which is common for RTs. We will use log transformations in the subsequent models to account for the distribution of the data.

Models

4.1 Subject-level RT

avg_subj_RT <- d_rt %>%
  filter(shift_type=="D-T")%>%
  group_by(sub_num, child_gender,condition) %>%
  summarize(N=n(),
            average_RT=mean(rt,na.rm=TRUE),
            ci=qt(0.975, N-1)*sd(rt,na.rm=T)/sqrt(N),
            lower_ci=average_RT-ci,
            upper_ci=average_RT+ci)
  

avg_subj_RT <- avg_subj_RT %>%
  mutate(
    typicality_condition_c = case_when(
      condition == "atypical" ~ -0.5,
      condition == "typical" ~ 0.5,
      TRUE ~ NA_real_
    ),
    typicality_condition_typ = case_when(
      condition == "atypical" ~ -1,
      condition == "typical" ~ 0,
      TRUE ~ NA_real_
    ),
    typicality_condition_atyp = case_when(
      condition == "atypical" ~ 0,
      condition == "typical" ~ 1,
      TRUE ~ NA_real_
    ),
  )

m_4_1 <- lmer(log(average_RT) ~ 1 + typicality_condition_c + (1|sub_num),data=avg_subj_RT)

summary(m_4_1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(average_RT) ~ 1 + typicality_condition_c + (1 | sub_num)
##    Data: avg_subj_RT
## 
## REML criterion at convergence: 101.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.44553 -0.58448  0.06917  0.55077  1.80974 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub_num  (Intercept) 0.04722  0.2173  
##  Residual             0.06435  0.2537  
## Number of obs: 172, groups:  sub_num, 86
## 
## Fixed effects:
##                        Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)             6.63866    0.03038 85.00000 218.494   <2e-16 ***
## typicality_condition_c -0.05198    0.03869 85.00000  -1.344    0.183    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ 0.000
confint(m_4_1,method="Wald")
##                             2.5 %     97.5 %
## .sig01                         NA         NA
## .sigma                         NA         NA
## (Intercept)             6.5791045 6.69820650
## typicality_condition_c -0.1277983 0.02384749

There is no significant typicality effect.

4.1.1 RT for typical

m_4_1_1 <- lmer(log(average_RT) ~ 1 + typicality_condition_typ + (1|sub_num),data=avg_subj_RT)

summary(m_4_1_1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(average_RT) ~ 1 + typicality_condition_typ + (1 | sub_num)
##    Data: avg_subj_RT
## 
## REML criterion at convergence: 101.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.44553 -0.58448  0.06917  0.55077  1.80974 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub_num  (Intercept) 0.04722  0.2173  
##  Residual             0.06435  0.2537  
## Number of obs: 172, groups:  sub_num, 86
## 
## Fixed effects:
##                           Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                6.61267    0.03602 144.17826 183.592   <2e-16 ***
## typicality_condition_typ  -0.05198    0.03869  85.00000  -1.344    0.183    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ 0.537
confint(m_4_1_1,method="Wald")
##                               2.5 %     97.5 %
## .sig01                           NA         NA
## .sigma                           NA         NA
## (Intercept)               6.5420732 6.68326241
## typicality_condition_typ -0.1277983 0.02384749

RT for atypical

m_4_1_2 <- lmer(log(average_RT) ~ 1 + typicality_condition_atyp + (1|sub_num),data=avg_subj_RT)

summary(m_4_1_2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(average_RT) ~ 1 + typicality_condition_atyp + (1 | sub_num)
##    Data: avg_subj_RT
## 
## REML criterion at convergence: 101.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.44553 -0.58448  0.06917  0.55077  1.80974 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub_num  (Intercept) 0.04722  0.2173  
##  Residual             0.06435  0.2537  
## Number of obs: 172, groups:  sub_num, 86
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 6.66464    0.03602 144.17826 185.035   <2e-16 ***
## typicality_condition_atyp  -0.05198    0.03869  85.00000  -1.344    0.183    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ -0.537
confint(m_4_1_2,method="Wald")
##                                2.5 %     97.5 %
## .sig01                            NA         NA
## .sigma                            NA         NA
## (Intercept)                6.5940486 6.73523780
## typicality_condition_atyp -0.1277983 0.02384749

4.2 Trial level RT

trial_rt <- d_rt %>%
  filter(shift_type=="D-T")%>%
  mutate(
    typicality_condition_c = case_when(
      condition == "atypical" ~ -0.5,
      condition == "typical" ~ 0.5,
      TRUE ~ NA_real_
    ),
    typicality_condition_typ = case_when(
      condition == "atypical" ~ -1,
      condition == "typical" ~ 0,
      TRUE ~ NA_real_
    ),
    typicality_condition_atyp = case_when(
      condition == "atypical" ~ 0,
      condition == "typical" ~ 1,
      TRUE ~ NA_real_
    ),
  )

m_4_2 <- lmer(log(rt) ~ 1 + typicality_condition_c + 
            (1+typicality_condition_c|sub_num) +
            (1|category),
          data=trial_rt)

summary(m_4_2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(rt) ~ 1 + typicality_condition_c + (1 + typicality_condition_c |  
##     sub_num) + (1 | category)
##    Data: trial_rt
## 
## REML criterion at convergence: 3274.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8850 -0.6552  0.1045  0.6364  2.4486 
## 
## Random effects:
##  Groups   Name                   Variance  Std.Dev. Corr 
##  sub_num  (Intercept)            0.0259295 0.16103       
##           typicality_condition_c 0.0079097 0.08894  -1.00
##  category (Intercept)            0.0009335 0.03055       
##  Residual                        0.5603636 0.74857       
## Number of obs: 1423, groups:  sub_num, 86; category, 4
## 
## Fixed effects:
##                         Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)              6.41260    0.03072   6.44238 208.729 1.39e-13 ***
## typicality_condition_c  -0.03398    0.04120 370.63537  -0.825     0.41    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## typclty_cn_ -0.128
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m_4_1_2,method="Wald")
##                                2.5 %     97.5 %
## .sig01                            NA         NA
## .sigma                            NA         NA
## (Intercept)                6.5940486 6.73523780
## typicality_condition_atyp -0.1277983 0.02384749